from pyspark.sql import SparkSession
from pyecharts.charts import Bar
import pyecharts.options as opts

if __name__ == '__main__':
    spark = SparkSession.Builder().appName("Jobs1").master("yarn").getOrCreate()
    df = spark.read.json("hdfs://node1:8020/input/datasets/51jobs.json")
    count = df.groupBy("companyTypeString").count().orderBy("count", ascending=False).limit(15)
    xaxis_data = count.select("companyTypeString").collect()
    yaxis_data = count.select("count").rdd.flatMap(lambda x: x).collect()

    bar = Bar().add_xaxis(xaxis_data).add_yaxis("企业性质", yaxis_data) \
        .set_global_opts(
        xaxis_opts=opts.AxisOpts(
            axislabel_opts={"interval": "0", "rotate": 45}
        )
    )
    bar.render()
